import pandas as pd

# 读取文件
a = pd.read_excel('去重结果_合并去重结果2.xlsx')
b = pd.read_excel('去重结果_无知识产权替换b.xlsx')

# 只保留b中需要的列
b_sub = b[['unifiedSocialcreditCode', 'category', 'industry']]
b_sub = b_sub.set_index('unifiedSocialcreditCode')

# 记录哪些a的code没有在b中出现
not_covered_codes = []

def update_row(row):
    code = row['unifiedSocialcreditCode']
    if code in b_sub.index:
        row['category'] = b_sub.loc[code, 'category']
        row['industry'] = b_sub.loc[code, 'industry']
    else:
        not_covered_codes.append(code)
    return row

a_updated = a.apply(update_row, axis=1)

# 保存更新后的a
a_updated.to_excel('无知识产权门类_result.xlsx', index=False)

# 保存没有被覆盖的code
pd.DataFrame({'unifiedSocialcreditCode': not_covered_codes}).to_csv('a未被b覆盖的code2.csv', index=False)